In [2]:
import pandas as pd

In [1]:
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Bar
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()
import datavis as dv


BokehJS successfully loaded.

In [61]:
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d
from bokeh.plotting import *

In [4]:
gfti = pd.read_pickle("../CDPdata/gfti.pkl")

In [265]:
gfti = pd.read_pickle("../CDPdata/profiles2.pkl")

In [266]:
gfti.columns


Out[266]:
Index([u'year', u'Organisation', u'Country', u'Industry', u'Sector', u'scope1', u'scope2', u'Revenues', u'COGS', u'Equity', u'PPE', u'Assets', u'Income', u's1and2total', u's1and2intensity', u'hasabsolute', u'hasintensity', u'target type', u'lnintensity', u'pCOGS', u'pPPE', u'ROE', u'pcintensity', u'plateau', u'steady', u'Industrials', u'Consumer Discretionary', u'Materials', u'Information Technology', u'Consumer Staples', u'Utilities', u'Energy', u'ConsumerDiscretionary', u'InformationTechnology', u'ConsumerStaples', u'USA', u'UnitedKingdom', u'Japan', u'SouthAfrica', u'France', u'SouthKorea', u'Germany', u'country', u'lnCOGS', u'lnPPE'], dtype='object')

graph emissions reported totals by country and sector


In [267]:
# need scopeg to get counts and totals
# screen out 2008
sc = gfti.set_index("Organisation")
sc = sc[sc["year"].isin(range(2009,2014))]
scopeg = sc.groupby("year")

In [268]:
# get the counts
d = scopeg.describe()
d = d.reset_index()
scope12_dis = d[d["level_1"]=="count"][["year", "scope1"]]
scope12_dis.rename(columns={"scope1": "n"}, inplace=True)

In [269]:
t = scopeg.sum().reset_index() # .rename(columns={"scope1": "totaltons"})
t.set_index("year", inplace=True)
# tons to Mt CO2e
t["scope1"] = t["scope1"]/1000000
t["scope2"] = t["scope2"]/1000000
scope12_dis.set_index("year", inplace=True)
scope12_dis = scope12_dis.join(t)

In [270]:
scope12_dis


Out[270]:
n scope1 scope2 Revenues COGS Equity PPE Assets Income s1and2total ... ConsumerStaples USA UnitedKingdom Japan SouthAfrica France SouthKorea Germany lnCOGS lnPPE
year
2009 629 2541.707460 585.404580 6.413210e+08 4.484680e+08 3.020914e+08 2.617664e+08 7.319289e+08 31520976.299614 3.127112e+09 ... 68 215 86 59 25 28 24 28 -inf -inf
2010 787 3410.717664 671.261160 8.225650e+08 5.732981e+08 4.009627e+08 3.183774e+08 8.935984e+08 55229269.037530 4.081979e+09 ... 84 241 101 92 39 32 30 36 -inf -inf
2011 926 3896.411890 749.143890 9.241278e+08 6.576092e+08 4.659793e+08 3.760603e+08 1.019032e+09 54907687.240198 4.645556e+09 ... 94 278 111 101 43 36 37 35 -inf -inf
2012 1068 4323.619389 789.097193 1.084900e+09 7.813952e+08 5.554828e+08 5.127672e+08 1.288933e+09 46439420.963059 5.112717e+09 ... 108 312 125 116 46 45 47 40 -inf -inf
2013 1075 4135.568080 787.157610 1.299321e+09 9.287255e+08 6.690813e+08 6.536406e+08 1.629194e+09 63384044.558930 4.922726e+09 ... 107 313 125 112 47 49 50 39 -inf -inf

5 rows × 39 columns


In [243]:
years = []
for i in scope12_dis.index:
    years.append(str(i))

In [244]:
values12 = OrderedDict()
values12["Companies That Disclosed That Year"] = scope12_dis["n"].values
# values12["Scope 1"]= scope12_dis["scope1"].values
# values12["Scope 2"]= scope12_dis["scope2"].values

In [245]:
bar = Bar(values12, years, title="Companies in the Sample", filename="scope12_disclosure.html",
           stacked=False, ylabel="n", xlabel="Year", palette=brewer["Spectral"][7])
#yend = 10000
# bar.y_range.end = 6000
# bar.y_range.end = 2000
#bar.extra_y_ranges = {"Mtons": Range1d(start=0, end=yend)}
# bar.add_layout(LinearAxis(y_range_name="Mtons", axis_label="Mt CO2e"), 'right')
# bar.show()

In [246]:
show(bar)



In [271]:
scope12 = sc

In [438]:
sectors_groups = scope12.reset_index().groupby("Sector")
sector_ns = sectors_groups.apply(lambda g: len(g["Organisation"].value_counts().index))

In [440]:
sector_ns.to_csv("../CDPdata/sectorns.csv")

In [441]:
c_groups = scope12.reset_index().groupby("Country")
c_ns = c_groups.apply(lambda g: len(g["Organisation"].value_counts().index))

In [442]:
c_ns.to_csv("../CDPdata/countryns.csv")

graph things


In [364]:
scope12["Sector"] = scope12["Sector"].apply(lambda x: "Telecoms" if x=="Telecommunication Services" else x)

In [366]:
scope12["scope1"] = scope12["scope1"]/1000000
scope12["scope2"] = scope12["scope2"]/1000000

In [367]:
scope12_cs = scope12.groupby(["year","Country","Sector"])
scope12_c = scope12.groupby(["year", "Country"])
scope12_s = scope12.groupby(["year", "Sector"])

In [368]:
# get sums of emissions disclosed
scope12_csums = scope12_c.sum().reset_index().set_index('year')
scope12_ssums = scope12_s.sum().reset_index().set_index('year')

In [369]:
# get count of number of companies that reported
d = scope12_s.describe()
d = d.reset_index()
scope12_sdis = d[d["level_2"]=="count"][["year", "Sector","scope1", "scope2"]]
scope12_sdis.set_index("year",inplace=True)

In [370]:
# get count of number of companies that reported by country
d = scope12_c.describe()
d = d.reset_index()
scope12_cdis = d[d["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scope12_cdis.set_index("year",inplace=True)

In [371]:
scope12_sdis.head()


Out[371]:
Sector scope1 scope2
year
2009 Consumer Discretionary 88 88
2009 Consumer Staples 68 68
2009 Energy 31 31
2009 Financials 30 30
2009 Health Care 40 40

In [372]:
g = scope12.reset_index().groupby("Country").sum().sort("scope1", ascending=0)
h = scope12.reset_index().groupby("Sector").sum().sort("scope1", ascending=0)
countries = g.index.tolist()
sectors = h.index.tolist()

In [373]:
sectors = scope12_sdis.loc[2010].sort("scope2", ascending= 0)["Sector"].tolist()
# sectors = scope12_sdis["Sector"].value_counts().index

In [390]:
countries = scope12_cdis.loc[2010].sort("scope2", ascending= 0)["Country"].tolist()

In [392]:
# run for sums data
scope1_csums_cats = dv.separate_cats(scope12_csums, "Country", countries, "scope1").fillna(0)
scope1_ssums_cats = dv.separate_cats(scope12_ssums, "Sector", sectors, "scope1").fillna(0)
scope2_csums_cats = dv.separate_cats(scope12_csums, "Country", countries, "scope2").fillna(0)
scope2_ssums_cats = dv.separate_cats(scope12_ssums, "Sector", sectors, "scope2").fillna(0)

In [401]:
scope12_cdis_cats


Out[401]:
USA United Kingdom Japan South Africa Germany France South Korea Sweden Switzerland Spain ... New Zealand Russia Israel Colombia Luxembourg Chile Singapore Austria Hungary Cyprus
year
2009 215 86 59 25 28 28 24 23 14 15 ... 1 1 1 1 1 1 0 1 0 0
2010 241 101 92 39 36 32 30 23 18 18 ... 2 2 2 2 2 1 1 1 1 1
2011 278 111 101 43 35 36 37 26 20 21 ... 4 2 2 2 2 1 1 4 1 1
2012 312 125 116 46 40 45 47 30 24 22 ... 6 2 2 2 2 2 2 6 1 0
2013 313 125 112 47 39 49 50 31 25 23 ... 6 1 2 3 2 2 2 6 1 1

5 rows × 34 columns


In [393]:
# can run for dis(closure) or sums data
scope12_cdis_cats = dv.separate_cats(scope12_cdis, "Country", countries, "scope1").fillna(0)
scope12_sdis_cats = dv.separate_cats(scope12_sdis, "Sector", sectors, "scope1").fillna(0)

In [111]:
# for stacked area chart

# max number of brewer colors is 11
s1_csums_vals = dv.stacked_cols(scope1_csums_cats, countries)
# s1_ssums_vals = dv.stacked_cols(scope1_ssums_cats, sectors)
# colors = brewer["Spectral"][len(sectors)]
colors = brewer["Spectral"][11]
x2 = np.hstack((range(2009,2014)[::-1], range(2009,2014)))

title = "Scope 1 CO2e Disclosed by Country"
# title = "Scope 1 CO2e Disclosed by Sector"
# title = "Companies Disclosing Scope 1 by Country"
s = figure(title = title)
s.patches([x2 for a in s1_csums_vals], list(s1_csums_vals.values()),
          color=colors, alpha=0.8, line_color=None)


Out[111]:
<bokeh.plotting.Figure at 0x7faa07eacbd0>

In [385]:
s1_csums_vs = dv.prep_stacked_bar(scope1_csums_cats, countries)
s2_csums_vs = dv.prep_stacked_bar(scope2_csums_cats, countries)
s1_ssums_vs = dv.prep_stacked_bar(scope1_ssums_cats, sectors)
s2_ssums_vs = dv.prep_stacked_bar(scope2_ssums_cats, sectors)

In [402]:
s12_cdis_vs = dv.prep_stacked_bar(scope12_cdis_cats, countries)
s12_sdis_vs = dv.prep_stacked_bar(scope12_sdis_cats, sectors)

In [384]:
reload(dv)


Out[384]:
<module 'datavis' from 'datavis.py'>

In [394]:
s1cbar = Bar(s1_csums_vs, years + ["", ""], title="Scope 1 CO2e Disclosed by Country", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

s2cbar = Bar(s2_csums_vs, years + ["", ""], title="Scope 2 CO2e Disclosed by Country", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

s1sbar = Bar(s1_ssums_vs, years + ["", ""], title="Scope 1 CO2e Disclosed by Sector", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

s2sbar = Bar(s2_ssums_vs, years + ["", ""], title="Scope 2 CO2e Disclosed by Sector", legend="bottom_right",
           stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])

In [403]:
s12cdbar = Bar(s12_cdis_vs, years + ["", ""], title="Companies That Disclosed by Country", legend="bottom_right",
           stacked=True, ylabel="n", xlabel="Year", palette=brewer["Spectral"][11])

s12sdbar = Bar(s12_sdis_vs, years + ["", ""], title="Companies That Disclosed by Sector", legend="bottom_right",
           stacked=True, ylabel="n", xlabel="Year", palette=brewer["Spectral"][11])

In [400]:
print countries


[u'USA', u'United Kingdom', u'Japan', u'South Africa', u'Germany', u'France', u'South Korea', u'Sweden', u'Switzerland', u'Spain', u'Brazil', u'Taiwan', u'Finland', u'Norway', u'India', u'Denmark', u'Italy', u'Netherlands', u'Belgium', u'Portugal', u'Australia', u'Ireland', u'Hong Kong', u'Turkey', u'New Zealand', u'Russia', u'Israel', u'Colombia', u'Luxembourg', u'Chile', u'Singapore', u'Austria', u'Hungary', u'Cyprus']

In [404]:
show(s12cdbar)


histograms of info for each year


In [283]:
gftip=gfti[~gfti["pcintensity"].isnull()]

In [287]:
gftip["pc100"] = gftip["pcintensity"]*100


-c:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [300]:
summary = gftip[['year','pc100']].groupby("year").describe()

In [312]:
mean = summary.reset_index().set_index(["level_1","year"]).sort_index().loc["mean"]
median = summary.reset_index().set_index(["level_1","year"]).sort_index().loc["50%"]

In [314]:
mean.rename(columns={"pc100":"mean"}, inplace=True)
median.rename(columns={"pc100":"median"}, inplace=True)

In [320]:
pc_avgs= mean.join(median).reset_index()

In [321]:
pc_avgs.to_csv("../CDPdata/pc_avgs.csv")

In [25]:
gftip = dv.prep_forhist(gftip, "pcintensity", -1, 1)

In [322]:
gftip = dv.prep_forhist(gftip, "pc100", -100, 100)

In [323]:
gftip.set_index("year",inplace=True)

In [33]:
plateaus = gftip[gftip["plateau"]]
steady = gftip[gftip["steady"]]

In [36]:
plateau_values = OrderedDict()
for yr in range(2010,2014):
    plateau_values[str(yr)] = plateaus.loc[yr]["pcintensity"].tolist()

In [37]:
steady_values = OrderedDict()
for yr in range(2010,2014):
    steady_values[str(yr)] = steady.loc[yr]["pcintensity"].tolist()

In [324]:
intensity_values = OrderedDict()
for yr in range(2010,2014):
    intensity_values[str(yr)] = gftip.loc[yr]["pc100"].tolist()

In [408]:
len(intensity_values["2013"])


Out[408]:
1047

In [350]:
colors = brewer["BuPu"][5][0:4]
colors.reverse()

In [425]:
# fname = "2012 had inttarget vs int change.html"
title =  "Annual Percent Intensity Change"
fname = "12intchangeyear.html"
hist = Histogram(intensity_values, bins=40, filename=fname, title = title, ylabel = "Density", xlabel = "% Change",legend=True)
# hist = Histogram(hiall_values, bins=30, filename=fname, title = title, ylabel = "", xlabel = "",legend=True)

In [426]:
show(hist)



In [ ]: